Skip to main content
Version: v1.4.1

Regression - Combined Cycle Power Plant Energy Output

Predicting electrical power output from a combined cycle power plant based on ambient conditions.

Dataset Source: UCI ML Repository - Combined Cycle Power Plant Problem Type: Regression Target Variable: Electrical energy output (MW) Use Case: Energy production optimization, power grid planning, efficiency analysis

Package Imports

1!pip install xplainable
2!pip install xplainable-client
1import pandas as pd
2import xplainable as ap
3from xplainable.core.models import XRegressor
4from xplainable.core.optimisation.genetic import XEvolutionaryNetwork
5from xplainable.core.optimisation.layers import Evolve, Tighten
6from xplainable_preprocessing import PipelineSpec, StepSpec, compile_spec
7from sklearn.model_selection import train_test_split
8import requests
9import json
10
11# Additional imports specific to this example
12import numpy as np
13import matplotlib.pyplot as plt
14import seaborn as sns
15from ucimlrepo import fetch_ucirepo
16
17from xplainable_client.client.client import XplainableClient
18from xplainable_client.client.base import XplainableAPIError

Xplainable Cloud Setup

1# Initialize Xplainable Cloud client
2client = XplainableClient(
3 api_key="", #Create api key in xplainable cloud - https://platform.xplainable.io/
4 hostname="https://platform.xplainable.io"
5)
Out:

Connected to Xplainable Cloud

User: jtuppa

Hostname: http://localhost:8000

API Key Expires: 2025-08-27T11:19:40.744423

Python Version: 3.10.18

Xplainable Version: 1.3.0

Data Loading and Exploration

Load the Combined Cycle Power Plant dataset from UCI ML Repository.

1# Load dataset using ucimlrepo
2try:
3 # Fetch dataset
4 power_plant = fetch_ucirepo(id=294)
5
6 # Data (as pandas dataframes)
7 X = power_plant.data.features
8 y = power_plant.data.targets
9
10 # Combine features and target
11 df = pd.concat([X, y], axis=1)
12
13 # Display basic information
14 print(f"Dataset shape: {df.shape}")
15 print(f"\nFeatures:")
16 print("- AT: Ambient Temperature (°C)")
17 print("- V: Exhaust Vacuum (cm Hg)")
18 print("- AP: Ambient Pressure (millibar)")
19 print("- RH: Relative Humidity (%)")
20 print(f"\nTarget: Energy output (MW)")
21 print(f"\nTarget variable statistics:")
22 print(y.describe())
23
24 df.head()
25
26except Exception as e:
27 print(f"Error loading dataset: {e}")
28 print("Install ucimlrepo: pip install ucimlrepo")
29 print("\nCreating synthetic dataset for testing...")
30
31 # Create synthetic power plant dataset
32 import numpy as np
33 np.random.seed(42)
34
35 n_samples = 5000
36 df = pd.DataFrame({
37 'AT': np.random.normal(20, 7, n_samples), # Ambient Temperature
38 'V': np.random.normal(50, 15, n_samples), # Exhaust Vacuum
39 'AP': np.random.normal(1015, 20, n_samples), # Ambient Pressure
40 'RH': np.random.normal(70, 20, n_samples), # Relative Humidity
41 })
42
43 # Create realistic energy output based on power plant physics
44 df['PE'] = (
45 480 - df['AT'] * 2.5 + df['V'] * 0.8 +
46 (df['AP'] - 1000) * 0.1 - df['RH'] * 0.05 +
47 np.random.normal(0, 5, n_samples)
48 )
49
50 print(f"Synthetic dataset created: {df.shape}")
51 print(f"Target variable statistics:")
52 print(df['PE'].describe())
Out:

Error loading dataset: Error connecting to server

Install ucimlrepo: pip install ucimlrepo

Creating synthetic dataset for testing...

Synthetic dataset created: (5000, 5)

Target variable statistics:

count 5000.000000

mean 467.696309

std 21.852907

min 376.394588

25% 452.931049

50% 467.651713

75% 482.484708

max 544.432530

Name: PE, dtype: float64

1. Data Preprocessing

Preprocess the power plant operational data.

1# Define preprocessing pipeline using PipelineSpec
2# The power plant dataset is typically clean, but we ensure no missing values remain.
3spec = PipelineSpec(steps=[
4 StepSpec(
5 transformer="FillMissingTransformer",
6 params={"strategy": "median"}
7 )
8])
9
10pipeline = compile_spec(spec)
11df_processed = pipeline.fit_transform(df)
12
13print(f"Processed dataset shape: {df_processed.shape}")
14print(f"Missing values: {df_processed.isnull().sum().sum()}")
15
16# Display correlation matrix
17plt.figure(figsize=(10, 8))
18correlation_matrix = df_processed.corr()
19sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0)
20plt.title('Correlation Matrix: Power Plant Variables')
21plt.show()
22
23df_processed.head()

Preprocessor Persistence

Save the preprocessing pipeline spec to Xplainable Cloud for reproducibility.

1# Persist the preprocessor to Xplainable Cloud
2# Uncomment to save preprocessor
3# try:
4# preprocessor_id = client.preprocessing.create_preprocessor(
5# spec=spec,
6# name="Power Plant Energy Output Preprocessor",
7# description="Fills missing values with median for power plant operational data"
8# )
9# print(f"Preprocessor created with ID: {preprocessor_id}")
10# except XplainableAPIError as e:
11# print(f"Error creating preprocessor: {e}")

Create Train/Test Split

1# Assuming the target column is the last one
2target_col = df_processed.columns[-1]
3X, y = df_processed.drop(columns=[target_col]), df_processed[target_col]
4
5X_train, X_test, y_train, y_test = train_test_split(
6 X, y, test_size=0.2, random_state=42
7)
8
9print(f"Training set: {X_train.shape[0]} samples")
10print(f"Test set: {X_test.shape[0]} samples")
11print(f"Features: {list(X.columns)}")
Out:

Training set: 4000 samples

Test set: 1000 samples

Features: ['AT', 'V', 'AP', 'RH']

2. Model Optimization

Optimize the model using genetic algorithms with evolutionary networks for power output prediction.

1# First train a base model
2model = XRegressor()
3model.fit(X_train, y_train)
4
5# Create evolutionary network for optimization
6network = XEvolutionaryNetwork(model)
7
8# Add optimization layers
9# Start with an initial Tighten layer
10network.add_layer(
11 Tighten(
12 iterations=100,
13 learning_rate=0.1,
14 early_stopping=20
15 )
16)
17
18# Add an Evolve layer with high severity for exploration
19network.add_layer(
20 Evolve(
21 mutations=100,
22 generations=50,
23 max_severity=0.5,
24 max_leaves=20,
25 early_stopping=20
26 )
27)
28
29# Add another Evolve layer with lower severity for refinement
30network.add_layer(
31 Evolve(
32 mutations=100,
33 generations=50,
34 max_severity=0.3,
35 max_leaves=15,
36 early_stopping=20
37 )
38)
39
40# Add a final Tighten layer with low learning rate for fine-tuning
41network.add_layer(
42 Tighten(
43 iterations=100,
44 learning_rate=0.025,
45 early_stopping=20
46 )
47)
48
49# Fit the network and run optimization
50network.fit(X_train, y_train)
51network.optimise()
Out:

0%| | 0/100 [00:00<?, ?it/s]

0%| | 0/50 [00:00<?, ?it/s]

0%| | 0/50 [00:00<?, ?it/s]

0%| | 0/100 [00:00<?, ?it/s]

<xplainable.core.optimisation.genetic.XEvolutionaryNetwork at 0x29630d0f0>

3. Model Training

The model has been trained and optimized through the evolutionary network process.

1# Model is already trained through the evolutionary network
2# Let's evaluate the optimized model performance
3train_performance = model.evaluate(X_train, y_train)
4print("Training Performance:")
5for metric, value in train_performance.items():
6 print(f"{metric}: {value:.4f}")
Out:

Training Performance:

Explained Variance: 0.9479

MAE: 3.8863

MAPE: 0.0083

MSE: 24.8965

RMSE: 4.9896

RMSLE: 0.0001

R2 Score: 0.9479

4. Model Interpretability and Explainability

Understand which ambient conditions most influence power plant energy output.

1model.explain()

5. Model Persistence (Optional)

Save the model to Xplainable Cloud.

1model_id, version_id = client.models.create_model(
2 model=model,
3 model_name="Power Plant Energy Output Model",
4 model_description="Predicting electrical power output from ambient conditions",
5 x=X_train,
6 y=y_train
7)
Out:

0%| | 0/4 [00:00<?, ?it/s]

6. Model Deployment (Optional)

Deploy the model for real-time power output predictions.

1try:
2 deployment_response = client.deployments.deploy(
3 model_version_id=version_id
4 )
5 deployment_id = deployment_response.deployment_id
6except XplainableAPIError as e:
7 print(f"Error deploying model: {e}")
8
9# Activate deployment
10try:
11 client.deployments.activate_deployment(deployment_id=deployment_id)
12except XplainableAPIError as e:
13 print(f"Error activating deployment: {e}")
14
15# Generate deploy key for inference
16try:
17 deploy_key = client.deployments.generate_deploy_key(
18 deployment_id=deployment_id,
19 description="API key for Power Plant Energy Output",
20 days_until_expiry=30
21 )
22except XplainableAPIError as e:
23 print(f"Error generating deploy key: {e}")

7. Model Testing

Evaluate model performance on power output predictions.

1# Evaluate on test set
2test_predictions = model.predict(X_test)
3test_performance = model.evaluate(X_test, y_test)
4
5print("Test Set Performance:")
6for metric, value in test_performance.items():
7 print(f"{metric}: {value:.4f}")
8
9# Plot predictions vs actual
10plt.figure(figsize=(12, 5))
11
12# Scatter plot
13plt.subplot(1, 2, 1)
14plt.scatter(y_test, test_predictions, alpha=0.6)
15plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', lw=2)
16plt.xlabel('Actual Energy Output (MW)')
17plt.ylabel('Predicted Energy Output (MW)')
18plt.title('Power Plant Energy Output: Predictions vs Actual')
19plt.grid(True, alpha=0.3)
20
21# Residuals plot
22plt.subplot(1, 2, 2)
23residuals = y_test - test_predictions
24plt.scatter(test_predictions, residuals, alpha=0.6)
25plt.axhline(y=0, color='r', linestyle='--')
26plt.xlabel('Predicted Energy Output (MW)')
27plt.ylabel('Residuals (MW)')
28plt.title('Residuals Plot')
29plt.grid(True, alpha=0.3)
30
31plt.tight_layout()
32plt.show()
33
34# Feature importance insights
35print("
36Expected feature importance patterns:")
37print("- Ambient Temperature (AT): Lower temperatures typically increase power output")
38print("- Exhaust Vacuum (V): Higher vacuum usually correlates with higher output")
39print("- Ambient Pressure (AP): Higher pressure tends to improve efficiency")
40print("- Relative Humidity (RH): Generally has less impact than temperature and pressure")